clear all

set more off
set matsize 1500

*Starting with US Data:

*Using the data from the second quarter:

use "fmli142", clear 

*Then using the expenditures reported for previous quarter (ends with "pq"):

*renaming total expenditures of previous quarter (Q1: Jan - Feb - Mar):

rename totexppq totexppq_Q1

*renaming Electricity expenditures of previous quarter (Q1: Jan - Feb - Mar):

rename elctrcpq elctrcpq_Q1

*renaming "all fuel" expenditures of previous quarter (Q1: Jan - Feb - Mar):

rename allfulpq allfulpq_Q1

*renaming Natural Gas expenditures of previous quarter (Q1: Jan - Feb - Mar):

rename ntlgaspq ntlgaspq_Q1

*renaming Gasoline expenditures of previous quarter (Q1: Jan - Feb - Mar):

rename gasmopq gasmopq_Q1

*renaming the weights of previous quarter (Q1: Jan - Feb - Mar):

rename finlwt21 finlwt21_Q1

*Merging the data for the third quarter and dropping the unmatched respondents:

merge 1:1 cuid using "fmli143", keep(match) nogenerate

*renaming total expenditures of previous quarter (Q2: Apr - May - Jun):

rename totexppq totexppq_Q2

*renaming Electricity expenditures of previous quarter (Q2: Apr - May - Jun):

rename elctrcpq elctrcpq_Q2

*renaming "All Fuel" expenditures of previous quarter (Q2: Apr - May - Jun):

rename allfulpq allfulpq_Q2

*renaming Natural Gas expenditures of previous quarter (Q2: Apr - May - Jun):

rename ntlgaspq ntlgaspq_Q2

*renaming Gasoline expenditures of previous quarter (Q2: Apr - May - Jun):

rename gasmopq gasmopq_Q2

*renaming the weights of previous quarter (Q2: Apr - May - Jun):

rename finlwt21 finlwt21_Q2

*Merging the data for the fourth quarter and dropping the unmatched respondents:

merge 1:1 cuid using "fmli144", keep(match) nogenerate

*renaming total expenditures of previous quarter (Q3: Jul - Aug - Sep):

rename totexppq totexppq_Q3

*renaming Electricity expenditures of previous quarter (Q3: Jul - Aug - Sep):

rename elctrcpq elctrcpq_Q3

*renaming "All Fuel" expenditures of previous quarter (Q3: Jul - Aug - Sep):

rename allfulpq allfulpq_Q3

*renaming Natural Gas expenditures of previous quarter (Q3: Jul - Aug - Sep):

rename ntlgaspq ntlgaspq_Q3

*renaming Gasoline expenditures of previous quarter (Q3: Jul - Aug - Sep):

rename gasmopq gasmopq_Q3

*renaming the weights of previous quarter (Q3: Jul - Aug - Sep):

rename finlwt21 finlwt21_Q3

*Merging the data for the fifth quarter (Q1 of 2015) and dropping the unmatched respondents:

merge 1:1 cuid using "fmli151", keep(match) nogenerate

*renaming total expenditures of previous quarter (Q4: Oct - Nov - Dec):

rename totexppq totexppq_Q4

*renaming Electricity expenditures of previous quarter (Q4: Oct - Nov - Dec):

rename elctrcpq elctrcpq_Q4

*renaming "All Fuel" expenditures of previous quarter (Q4: Oct - Nov - Dec):

rename allfulpq allfulpq_Q4

*renaming Natural Gas expenditures of previous quarter (Q4: Oct - Nov - Dec):

rename ntlgaspq ntlgaspq_Q4

*renaming Gasoline expenditures of previous quarter (Q4: Oct - Nov - Dec):

rename gasmopq gasmopq_Q4

*renaming the weights expenditures of previous quarter (Q4: Oct - Nov - Dec):

rename finlwt21 finlwt21_Q4

*Averaging the weights:

gen weight = (finlwt21_Q1+finlwt21_Q2+finlwt21_Q3+finlwt21_Q4)/4

*Aggregating quaterly expenditures:

gen TotalExp = totexppq_Q1 + totexppq_Q2 + totexppq_Q3 + totexppq_Q4

*Creating deciles:

xtile exp_decile = TotalExp[pw = weight], n(10)

*Aggregating the expenditure categories:

gen Electricity = elctrcpq_Q1 + elctrcpq_Q2 + elctrcpq_Q3 + elctrcpq_Q4

gen Fuel = allfulpq_Q1 + allfulpq_Q2 + allfulpq_Q3 + allfulpq_Q4

gen Gasoline = gasmopq_Q1 + gasmopq_Q2 + gasmopq_Q3 + gasmopq_Q4

gen NatGas = ntlgaspq_Q1 + ntlgaspq_Q2 + ntlgaspq_Q3 + ntlgaspq_Q4

*Shares (Expenditure share for each household):

gen E_share = (Electricity/TotalExp)*100

gen F_share = (Fuel/TotalExp)*100

gen NG_share = (NatGas / TotalExp)*100

gen G_share = (Gasoline / TotalExp)*100

*explore covariates

gen logy=log(Tot)
gen loge=log(Elec)

destring cutenure,replace
destring bls_urbn,replace
destring region,replace
destring popsize,replace
destring state,replace
mvencode state popsize region,mv(999)

save "Q1-Q4 data",replace

log using "table2",replace text
sum Elec loge Tot logy E_share
log close

*===========================================================
*CU character only
*exclude variables: horref1, hisp_ref, prinernm, inc_rnkm,
*                   prinern1, prinern2, prinern3, prinern4, prinern5,
*                   inc_rnk1, inc_rnk2, inc_rnk3, inc_rnk4, inc_rnk5
*===========================================================
* Generate data for R

*1. Keep relevant variables and generate dummy variables
use "Q1-Q4 data.dta", clear

keep TotalExp Electricity loge logy as_comp1 as_comp2 as_comp3 as_comp4 fam_size inc_rank no_earnr ///
         num_auto perslt18 persot64 vehq erankh vehql num_tvan erankhm ///
		 bls_urbn cutenure earncomp fam_type popsize ///
		 region smsastat childage inclass ///
		 state	inclass2 psu weight
		 
foreach variable in "bls_urbn" "cutenure" "earncomp" "fam_type" "popsize" ///
    "region" "smsastat" "childage" "inclass" ///
	"state"	"inclass2" "psu" {
	
	tab `variable', gen(i_`variable')
	drop `variable'
	}
save "Q1-Q4 Rdata CU weight.dta", replace

*2. change missing geocodes to zero
use "Q1-Q4 Rdata CU weight.dta", clear
mvencode i_state1 i_state2 i_state3 i_state4 i_state5 i_state6 i_state7 i_state8 i_state9 i_state10 ///
     i_state11 i_state12 i_state13 i_state14 i_state15 i_state16 i_state17 i_state18 i_state19 i_state20 ///
	 i_state21 i_state22 i_state23 i_state24 i_state25 i_state26 i_state27 i_state28 i_state29 i_state30 ///
	 i_state31 i_state32 i_state33 i_state34 i_state35 i_state36 i_state37 i_state38 ///
	 i_psu1 i_psu2 i_psu3 i_psu4 i_psu5 i_psu6 i_psu7 i_psu8 i_psu9 i_psu10 ///
	 i_psu11 i_psu12 i_psu13 i_psu14 i_psu15 i_psu16 i_psu17 i_psu18 i_psu19 i_psu20 i_psu21 ///
	 ,mv(0) over


save "Q1-Q4 Rdata CU v2 weight.dta", replace
